# -*- coding:utf-8 -*-
import urllib.request,re
 
page = 1
url = 'http://www.qiushibaike.com/hot/page/' + str(page)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
pattern=re.compile(r'qiushi_tag_(\d*)')
pattern2=re.compile(r'''<div id="single-next-link" title="下一条">[\d\D]*?<div class="content">([\d\D]*?)</div>''')
n=re.compile(r'\n+')
br=re.compile(r'<br/>+')

try:
    request = urllib.request.Request(url,headers = headers)
    response = urllib.request.urlopen(request)
    content=response.read().decode('utf8')
    items=pattern.findall(content)
    for item in items:
        url2='https://www.qiushibaike.com/article/'+item
        request2=urllib.request.Request(url2,headers = headers)
        response2 = urllib.request.urlopen(request2)
        content2=response2.read().decode('utf8')
        match=pattern2.search(content2)
        if match:
            out = re.sub(br, '\n', match.group(1))
            out2=re.sub(n, '\n',out)
            print(out2)
except urllib.error.URLError as e:
    if hasattr(e,"code"):
        print(e.code)
    if hasattr(e,"reason"):
        print(e.reason)


